Obtém informações do conjunto de dados AutoMPG Dataset
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(datasets)
nrow(mpg)
## [1] 234
ncol(mpg)
## [1] 11
library(plotly)
trace_0 = rnorm(100, mean = 5)
trace_1 = rnorm(100, mean = 0)
trace_2 = rnorm(100, mean = -5)
x = c(1:100)
data = data.frame(x, trace_0, trace_1, trace_2)
fig = plot_ly(data, x = ~x)
fig = fig %>% add_trace(y = ~trace_0, name = 'Padrão 1',mode = 'lines')
fig = fig %>% add_trace(y = ~trace_1, name = 'Padrão 2', mode = 'lines+markers')
fig = fig %>% add_trace(y = ~trace_2, name = 'Padrão 3', mode = 'markers')
fig
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
Segue um gráfico de barras para o conjunto AutoMPG Dataset:
fig = plot_ly(data = mpg, x = ~model, y = ~year, type = 'bar')
fig
Filtrando instancias
str(mpg)
## tibble [234 × 11] (S3: tbl_df/tbl/data.frame)
## $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
## $ model : chr [1:234] "a4" "a4" "a4" "a4" ...
## $ displ : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr [1:234] "f" "f" "f" "f" ...
## $ cty : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr [1:234] "p" "p" "p" "p" ...
## $ class : chr [1:234] "compact" "compact" "compact" "compact" ...
Contando os valores do atributo “manufacturer” e pegando esses valores:
counts = table(mpg$manufacturer)
values = unique(mpg$manufacturer)
Nosso primeiro histograma:
fig = plot_ly(x = values, y = counts, type = 'bar',name=values)
fig = fig %>% layout(xaxis = list(title = 'Modelo'), yaxis = list(title = 'Ano'))
fig
Podemos testar também o gráfico de barras empilhado:
counts = table(mpg$manufacturer)
fig = plot_ly(data = mpg, x = ~model, y = ~year, type = 'bar')
#fig <- fig %>% layout(yaxis = list(title = 'Count'), barmode = 'stack')
fig
Vamos comparar agora apenas os carros das fabricantes Toyota e Volkswagen
toyota = mpg[mpg$manufacturer == 'toyota',]
volks = mpg[mpg$manufacturer == 'volkswagen',]
Como fica a Toyota:
toyota
Como fica Volkswagen:
volks
fig = plot_ly(data = volks, labels = volks$year, x=volks$year, type = 'pie')
fig
## Warning: 'pie' objects don't have these attributes: 'x'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'labels', 'label0', 'dlabel', 'values', 'marker', 'text', 'hovertext', 'scalegroup', 'textinfo', 'hoverinfo', 'hovertemplate', 'texttemplate', 'textposition', 'textfont', 'insidetextorientation', 'insidetextfont', 'outsidetextfont', 'automargin', 'title', 'domain', 'hole', 'sort', 'direction', 'rotation', 'pull', '_deprecated', 'idssrc', 'customdatasrc', 'metasrc', 'labelssrc', 'valuessrc', 'textsrc', 'hovertextsrc', 'hoverinfosrc', 'hovertemplatesrc', 'texttemplatesrc', 'textpositionsrc', 'pullsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
fig = plot_ly(data = toyota, labels = toyota$class, x= toyota$class, type = 'pie')
fig
## Warning: 'pie' objects don't have these attributes: 'x'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'labels', 'label0', 'dlabel', 'values', 'marker', 'text', 'hovertext', 'scalegroup', 'textinfo', 'hoverinfo', 'hovertemplate', 'texttemplate', 'textposition', 'textfont', 'insidetextorientation', 'insidetextfont', 'outsidetextfont', 'automargin', 'title', 'domain', 'hole', 'sort', 'direction', 'rotation', 'pull', '_deprecated', 'idssrc', 'customdatasrc', 'metasrc', 'labelssrc', 'valuessrc', 'textsrc', 'hovertextsrc', 'hoverinfosrc', 'hovertemplatesrc', 'texttemplatesrc', 'textpositionsrc', 'pullsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
fig = plot_ly(y=volks$displ, type = "box", name="Prices Volkswagen")
fig = fig %>% add_trace(y=toyota$displ, name="Prices Toyota")
fig
summary(volks$displ)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.800 2.000 2.000 2.256 2.500 3.600
fig = plot_ly(y=volks$displ, type = "box", boxpoints = "all", name="Prices Volkswagen")
fig = fig %>% add_trace(y=toyota$displ, name="Prices Toyota")
fig
df = read.csv("https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv")
#df
#table(df)
Plota o mapa com as informações solicitadas
fig = plot_ly(df, type='choropleth', locations=df$CODE, z=df$GDP..BILLIONS., text=df$COUNTRY, colorscale="Blues")
fig
fig = plot_ly(data = iris, x = ~Sepal.Length, y = ~Petal.Length)
fig
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
library(plotly)
pal = c("red", "blue", "green")
pal = setNames(pal, c("virginica", "setosa", "versicolor"))
fig = plot_ly(data = iris, x = ~Sepal.Length, y = ~Petal.Length, color = ~Species, colors = pal)
fig
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
pal = c("red", "blue", "green")
pal = setNames(pal, c("virginica", "setosa", "versicolor"))
fig = plot_ly(data = iris, x = ~Sepal.Length, y = ~Petal.Length, color = ~Species, colors = pal,
marker = list(size = 8,
line = list(color="black",width = 1)))
fig
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
pal = c("red", "blue", "green")
pal = setNames(pal, c("virginica", "setosa", "versicolor"))
fig = plot_ly(data = iris, x = ~Sepal.Length, y = ~Petal.Length, color = I('black'),
mode = 'markers', symbol = ~Species, symbols = c('circle','x','o'))
fig
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
axis = list(showline=FALSE,
zeroline=FALSE,
gridcolor='#ffff',
ticklen=4,
titlefont=list(size=13))
fig <- iris %>%
plot_ly()
fig <- fig %>%
add_trace(
type = 'splom',
dimensions = list(
list(label='sepal length', values=~Sepal.Length),
list(label='sepal width', values=~Sepal.Width),
list(label='petal length', values=~Petal.Length),
list(label='petal width', values=~Petal.Width)
),
color = ~Species, colors = c('#636EFA','#EF553B','#00CC96') ,
marker = list(size = 8,line = list(color="black",width = 1)
)
)
fig <- fig %>% style(diagonal = list(visible = FALSE))
fig <- fig %>%
layout(
hovermode='closest',
dragmode= 'select',
plot_bgcolor='rgba(240,240,240, 0.95)',
xaxis=list(domain=NULL, showline=F, zeroline=F, gridcolor='#ffff', ticklen=4),
yaxis=list(domain=NULL, showline=F, zeroline=F, gridcolor='#ffff', ticklen=4),
xaxis2=axis,
xaxis3=axis,
xaxis4=axis,
yaxis2=axis,
yaxis3=axis,
yaxis4=axis
)
fig
fig <- plot_ly(z = volcano, type = "heatmap")
fig
dados = iris[,1:4]
#dados.scaled = scale(dados, center = T, scale = T)
corrIris = cor(dados)
corrIris
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 1.0000000 -0.1175698 0.8717538 0.8179411
## Sepal.Width -0.1175698 1.0000000 -0.4284401 -0.3661259
## Petal.Length 0.8717538 -0.4284401 1.0000000 0.9628654
## Petal.Width 0.8179411 -0.3661259 0.9628654 1.0000000
dimensions = list('sepal length','sepal width','petal length','petal width')
fig = plot_ly(x=dimensions,y=dimensions,z=corrIris, type = "heatmap",colors = "Greys")
fig
Podemos empregar uma técnica de redução de dimensionalidade para transformar os dados originais definidos em um espaço de alta dimensão para um espaço de dimensão baixa. Para propósitos de visualização, a baixa dimensão é igual a \(2\).
Vamos apresentar duas técnicas de redução de dimensionalidade: Análise de Componentes Principais (Principal Component Analysis - PCA) e t-Distributed Stochastic Neighborhood Embedding (t-SNE).
Um bom tutorial sobre PCA pode ser encontrado neste link.
Variância acumulada das componentes principais:
library(plotly)
library(stats)
X = subset(iris, select = -c(Species))
prin_comp = prcomp(X, rank. = 2)
summary(prin_comp)
## Importance of first k=2 (out of 4) components:
## PC1 PC2
## Standard deviation 2.0563 0.49262
## Proportion of Variance 0.9246 0.05307
## Cumulative Proportion 0.9246 0.97769
A proporção de variância concentrada pelas primeiras k compenentes principais é dada por:
explained_variance_ratio <- summary(prin_comp)[["importance"]]['Proportion of Variance',]
cumsum <- cumsum(explained_variance_ratio)
data <- data.frame(cumsum,seq(1, length(cumsum), 1))
colnames(data) <- c('Explained_Variance','Components')
fig = plot_ly(data = data, x = ~Components, y = ~Explained_Variance, type = 'scatter', mode = 'lines', fill = 'tozeroy') %>%
layout(
xaxis = list(
title = "# Componentes", tickvals = seq(1, length(cumsum), 1)),
yaxis = list(
title = "Variância Concentrada"))
fig
Visualizando o conjunto de dados Iris
components = prin_comp[["x"]]
components = data.frame(components)
components = cbind(components, iris$Species)
fig = plot_ly(components, x = ~PC1, y = ~PC2, color = ~iris$Species, colors = c('#636EFA','#EF553B','#00CC96'),
type = 'scatter',
text = ~iris$Species,
textposition = "auto",
hoverinfo = 'text',
#hovertemplate = paste('<i>PC1</i>: %{y:.2f}',
# '<br><b>PC2</b>: %{x}<br>',
# '<b>%{text}</b>'),
marker = list(size = 8,line = list(color="black",width = 1)))
fig
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
#install.packages("tsne")
Agora vamos realizar a apresentação dos dados
library(tsne)
features = subset(iris, select = -c(Species))
set.seed(0)
tsne = tsne(features, initial_dims = 2,perplexity=32)
## sigma summary: Min. : 0.389524058138923 |1st Qu. : 0.465656663923391 |Median : 0.525013618999155 |Mean : 0.534049275364469 |3rd Qu. : 0.59709359081531 |Max. : 0.76775512516638 |
## Epoch: Iteration #100 error is: 10.7045314935631
## Epoch: Iteration #200 error is: 0.0543421795362035
## Epoch: Iteration #300 error is: 0.0521840519824475
## Epoch: Iteration #400 error is: 0.051380705408758
## Epoch: Iteration #500 error is: 0.0511819976879906
## Epoch: Iteration #600 error is: 0.0511223834873948
## Epoch: Iteration #700 error is: 0.0511048166892463
## Epoch: Iteration #800 error is: 0.0510989180454898
## Epoch: Iteration #900 error is: 0.0510969852365816
## Epoch: Iteration #1000 error is: 0.0510962022503454
tsne = data.frame(tsne)
pdb = cbind(tsne,iris$Species) # juntando vetores por meio das colunas
fig = plot_ly(data = pdb ,x = ~X1, y = ~X2, type = 'scatter', marker = list(size = 8,line = list(color="black",width = 1)), split = ~iris$Species)
fig
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode